# packages
library("rio")     # data import/export

# preparation of conjoint data

# load raw data
dat_bbc <- rio::import("bbc-data-raw.sav")
stopifnot(dim(dat_bbc) == c(1635, 167))

# names(dat_bbc)
# codebook <- lapply(dat_bbc, attr, "label")

## features
features <- dat_bbc[, grepl("_seen[[:digit:]]{1}", names(dat_bbc)) ]
names(features) <- sub("_seen1_t", "A", names(features))
names(features) <- sub("_seen2_t", "B", names(features))

## demographics
demographics <- dat_bbc[, c(names(dat_bbc)[seq_len(which(names(dat_bbc) == "partyid_2017"))], "rl2", "rl3", "rl4")]
## outcome is which name was chosen
outcomes <- stats::setNames(dat_bbc[, names(dat_bbc)[grep("QPAIR[[:digit:]]{1}$", names(dat_bbc))] ], paste0("outcome", 1:5))

conj <- 
  cbind(
    # features
    features,
    # outcomes
    outcomes,
    # timing
    stats::setNames(dat_bbc[, grepl("page_Intro[[:digit:]]{1}_timing", names(dat_bbc))], paste0("timing", 1:5)),
    # demographics
    demographics
  )
stopifnot(nrow(conj) == nrow(dat_bbc))

rm(features, demographics, outcomes)

# stack profiles
conj2 <- reshape(conj, 
    varying = list(
        names(conj)[grep("^nameA", names(conj))],
        names(conj)[grep("^nameB", names(conj))],
        names(conj)[grep("^ageA", names(conj))],
        names(conj)[grep("^ageB", names(conj))],
        names(conj)[grep("^experienceA", names(conj))],
        names(conj)[grep("^experienceB", names(conj))],
        names(conj)[grep("^degreeA", names(conj))],
        names(conj)[grep("^degreeB", names(conj))],
        names(conj)[grep("^politicsA", names(conj))],
        names(conj)[grep("^politicsB", names(conj))],
        names(conj)[grep("^euA", names(conj))],
        names(conj)[grep("^euB", names(conj))],
        names(conj)[grep("^occupationA", names(conj))],
        names(conj)[grep("^occupationB", names(conj))],
        names(conj)[grep("^timing", names(conj))],
        names(conj)[grep("^outcome", names(conj))]
    ),
    v.names = c(
        "nameA", "nameB", 
        "ageA", "ageB", 
        "experienceA", "experienceB", 
        "degreeA", "degreeB",
        "partyA", "partyB",
        "euA", "euB",
        "occupationA", "occupationB",
        "timing", "outcome"
    ),
    timevar = "pair",
    idvar = "ID",
    direction = "long"
)
stopifnot(nrow(conj2) == 5 * nrow(conj))
conj <- conj2
rm(conj2)

# create conjoint profile display pair variable
conj[["id_pair"]] <- paste0(conj[["ID"]], "_", conj[["pair"]])

# recode outcome so it is a 0/1 indicator of whether profile was chosen (rather than indicator of name chosen)
conj[["outcomeA"]] <- as.integer(conj[["outcome"]] == conj[["nameA"]])
conj[["outcomeB"]] <- as.integer(conj[["outcome"]] == conj[["nameB"]])
conj[["outcome"]] <- NULL

# stack a/b options
conj2 <- reshape(conj,
    varying = list(
        c("nameA", "nameB"), 
        c("ageA", "ageB"), 
        c("experienceA", "experienceB"), 
        c("degreeA", "degreeB"),
        c("partyA", "partyB"),
        c("euA", "euB"),
        c("occupationA", "occupationB"),
        c("outcomeA", "outcomeB")
    ),
    v.names = c("name", "age", "experience", "degree", "party", "eu", "occupation", "outcome"),
    timevar = "AB",
    idvar = "id_pair",
    direction = "long"
)
stopifnot(nrow(conj2) == 2 * nrow(conj))
conj <- conj2
rm(conj2)

# convert feature variables to labeled factors
conj[["name"]] <- rio::factorize(conj[["name"]])
levels(conj[["name"]])[levels(conj[["name"]]) %in% c("Skipped", "Not Asked")] <- NA_character_
conj[["age"]] <- rio::factorize(conj[["age"]])
levels(conj[["age"]])[levels(conj[["age"]]) %in% c("Skipped", "Not Asked")] <- NA_character_
conj[["experience"]] <- rio::factorize(conj[["experience"]])
levels(conj[["experience"]])[levels(conj[["experience"]]) %in% c("Skipped", "Not Asked")] <- NA_character_
conj[["degree"]] <- rio::factorize(conj[["degree"]])
levels(conj[["degree"]])[levels(conj[["degree"]]) %in% c("Skipped", "Not Asked")] <- NA_character_
conj[["party"]] <- factor(conj[["party"]], levels = c(3,2,1),
                            labels = c("Didn't support a party at the 2017 election",
                             "Supported the Labour Party at the 2017 election",
                             "Supported the Conservative Party at the 2017 election"))
conj[["eu"]] <- factor(conj[["eu"]], levels = c(3,1,2),
                        labels = c("Didn't support a side in the EU referendum",
                          "Supported the Remain campaign in the EU referendum",
                          "Supported the Leave campaign in the EU referendum"))
conj[["occupation"]] <- rio::factorize(conj[["occupation"]])
levels(conj[["occupation"]])[levels(conj[["occupation"]]) %in% c("Skipped", "Not Asked")] <- NA_character_


# Identity variables
conj[["id_party"]] <- ifelse(rio::characterize(conj[["partyid_2017"]]) %in% c("Don't know", "No – None"),
                             rio::characterize(conj$rl2),
                             rio::characterize(conj[["partyid_2017"]]))
conj[["id_brexit"]] <- rio::characterize(conj$rl3)

# export
rio::export(conj, "conjoint-bbc-stacked.rds")

# cleanup
rm(conj)
rm(dat_bbc)
